/* 
Calculate entries in Table 5 from raw data
*/

clear all
cd "C:\Users\bgilbert_a\Dropbox\Supply Side Oil Gas\BigWellData"
set more off

/***********
Calculate Total Wells, % Gas, % Oil, gg, Tgt Gas Value, Tgt Oil Value, 
Assoc Gas Value, Assoc Oil Value, Well Value, and Well Depth from Table 5.
************/

use "header.dta", clear
keep if prod_type == "GAS" | prod_type =="OIL"

gen date = date(first_prod_date,"MDY")
format date %td
gen month = mofd(date)
format month %tm 

keep if basin == "ANADARKO BASIN" | basin == "CHAUTAUQUA PLATFORM" | ///
		basin == "EAST TEXAS BASIN" | basin == "FT WORTH BASIN" | basin == "PERMIAN BASIN"
		
gen oil_well = 0
replace oil_well = 1 if prod_type == "OIL"

gen gas_well = 0
replace gas_well = 1 if prod_type == "GAS"

gen gather = 0
replace gather = 1 if gas_gath_name_1 != "(N/A)" & oil_well==1

gen oil_targ12f = first12_liq/oil_well
gen gas_targ12f = first12_gas/gas_well
gen oil_assoc12f = first12_liq/gas_well
gen gas_assoc_all12f = first12_gas/oil_well
gen gas_assoc_capt12f = first12_gas/oil_well*gather

replace gather = . if gas_well==1

collapse (sum) total_wells = oil_or_gas gas_wells = gas_well oil_wells = oil_well ///
		(mean) mean_depth = total_depth mean_oil = first12_liq mean_gas = first12_gas ///
		oil_targ12f gas_targ12f oil_assoc12f gas_assoc_all12f gas_assoc_capt12f ///
		gather, by(basin)

*Calculate proportions of oil and gas wells
gen gas_prop = gas_wells/total_wells
gen oil_prop = oil_wells/total_wells

*Get commodity values with avg gas price = $5.30/Mcf and avg oil price = $73.77/bbl

gen tgt_gas_value = 5.30*gas_targ12f

gen tgt_oil_value = 73.77*oil_targ12f

gen ass_gas_value = 5.30*gas_assoc_capt12f

gen ass_oil_value = 73.77*oil_assoc12f

*Get average well value

gen well_value = gas_prop*(tgt_gas_value + ass_oil_value) + oil_prop*(tgt_oil_value + ass_gas_value)

*In Table 5: total_wells is column 1, gas_prop is column 2, oil prop is column 3, 
* gather is column 8, tgt_gas_value through ass_oil_value are columns 9 through 12,
* well_value is column 13, and mean_depth is column 14


/***********
Calculate psi_g and psi_o (columns 6 and 7) from Table 5.
************/

use "header.dta", clear
keep if prod_type == "GAS" | prod_type =="OIL"

gen date = date(first_prod_date,"MDY")
format date %td
gen month = mofd(date)
format month %tm 

keep if basin == "ANADARKO BASIN" | basin == "CHAUTAUQUA PLATFORM" | ///
		basin == "EAST TEXAS BASIN" | basin == "FT WORTH BASIN" | basin == "PERMIAN BASIN"
		
gen oil_well = 0
replace oil_well = 1 if prod_type == "OIL"

gen gas_well = 0
replace gas_well = 1 if prod_type == "GAS"

gen gather = 0
replace gather = 1 if gas_gath_name_1 != "(N/A)" & oil_well==1

gen oil_targ12f = first12_liq/oil_well
gen gas_targ12f = first12_gas/gas_well
gen oil_assoc12f = first12_liq/gas_well
gen gas_assoc_all12f = first12_gas/oil_well
gen gas_assoc_capt12f = first12_gas/oil_well*gather

replace gather = . if gas_well==1


* associated liquids, from first12
preserve
collapse (sum) first12_gas ass_f12_liq = first12_liq if prod_type == "GAS", by(basin month)
sort basin month
gen psi_ot12f = ass_f12_liq*5.8/first12_gas
order basin psi_ot12f 
encode basin, gen(basin_no)
#delimit ;
reshape wide basin psi_ot12f first12_gas ass_f12_liq , i(month) j(basin_no);
#delimit cr
sort month
tsset month
tempfile temp1    /* create a temporary file */
save "`temp1'"      /* save memory into the temporary file */
* Anadarko 1, Chautauqua 2, East Texas 3, Ft Worth 4, Permian 5
restore

*  associated gas, from first12
replace first12_gas=0 if gas_gath_name_1 == "(N/A)"
preserve
collapse (sum) ass_f12_gas = first12_gas first12_liq if prod_type == "OIL", by(basin month)
sort basin month
gen psi_gt12f = ass_f12_gas/(5.8*first12_liq)
order basin psi_gt12f 
encode basin, gen(basin_no)
#delimit ;
reshape wide basin psi_gt12f first12_liq ass_f12_gas, i(month) j(basin_no);
#delimit cr
sort month
tempfile temp2    /* create a temporary file */
save "`temp2'"      /* save memory into the temporary file */
* Anadarko 1, Chautauqua 2, East Texas 3, Ft Worth 4, Permian 5
restore

use "`temp1'", clear
* Anadarko 1, Chautauqua 2, East Texas 3, Ft Worth 4, Permian 5
* using means, associated oil
bootstrap r(mean), reps(1000): summarize psi_ot12f1, detail
est sto meanbso1f12
bootstrap r(mean), reps(1000): summarize psi_ot12f2, detail
est sto meanbso2f12
bootstrap r(mean), reps(1000): summarize psi_ot12f3, detail
est sto meanbso3f12
bootstrap r(mean), reps(1000): summarize psi_ot12f4, detail
est sto meanbso4f12
bootstrap r(mean), reps(1000): summarize psi_ot12f5, detail
est sto meanbso5f12

use "`temp2'", clear
* using means, associated gas
bootstrap r(mean), reps(1000): summarize psi_gt12f1, detail
est sto meanbsg1f12
bootstrap r(mean), reps(1000): summarize psi_gt12f2, detail
est sto meanbsg2f12
bootstrap r(mean), reps(1000): summarize psi_gt12f3, detail
est sto meanbsg3f12
bootstrap r(mean), reps(1000): summarize psi_gt12f4, detail
est sto meanbsg4f12
bootstrap r(mean), reps(1000): summarize psi_gt12f5, detail
est sto meanbsg5f12

* associated oil psi_o table
esttab meanbso1f12 meanbso2f12 meanbso3f12 meanbso4f12 meanbso5f12, se
* associated gas psi_g table
esttab meanbsg1f12 meanbsg2f12 meanbsg3f12 meanbsg4f12 meanbsg5f12, se


/***********
Estimate alpha_g and alpha_o (columns 4 and 5) from Table 5.
************/

use "basin_production.dta", clear /* Fields taken from Enverus/DrillingInfo HPDI PDEN_PROD data */

* basin_production.dta has monthly production from individual wells in
* Permian, Ft Worth, Anadarko, Chautauqua, and East Texas

xtset entity_id prod_month

*We will require at least 20 observations for a well to be used in the decline 
*rate estimation. Might not be 20 contiguous observations.
*This drops a relatively small number of observations: 130k out of 6 mil+.

by entity_id: gen obs = _N

drop if obs < 20

*Now we make out time since "drilling" variable

by entity_id: gen t = _n

*Generate logs of gas and liquids

gen ln_gas = ln(gas)

gen ln_liq = ln(liq)

*Basin-level average decline rate estimation

set more off
xtreg ln_gas t if basin == "ANADARKO BASIN", fe vce(cluster entity_id)
est sto ang
xtreg ln_liq t if basin == "ANADARKO BASIN", fe vce(cluster entity_id)
est sto anl

xtreg ln_gas t if basin == "CHAUTAUQUA PLATFORM", fe vce(cluster entity_id)
est sto chg
xtreg ln_liq t if basin == "CHAUTAUQUA PLATFORM", fe vce(cluster entity_id)
est sto chl

xtreg ln_gas t if basin == "EAST TEXAS BASIN", fe vce(cluster entity_id)
est sto etg
xtreg ln_liq t if basin == "EAST TEXAS BASIN", fe vce(cluster entity_id)
est sto etl

xtreg ln_gas t if basin == "FT WORTH BASIN", fe vce(cluster entity_id)
est sto fwg
xtreg ln_liq t if basin == "FT WORTH BASIN", fe vce(cluster entity_id)
est sto fwl

xtreg ln_gas t if basin == "PERMIAN BASIN", fe vce(cluster entity_id)
est sto peg
xtreg ln_liq t if basin == "PERMIAN BASIN", fe vce(cluster entity_id)
est sto pel

esttab ang chg etg fwg peg, scalars(N r2) b(a2) se(a3) star(* 0.10 ** 0.05 *** 0.01) 
esttab anl chl etl fwl pel, scalars(N r2) b(a2) se(a3) star(* 0.10 ** 0.05 *** 0.01) 
